## Load estimates ----
main_ames <- rio::import("est/stayer_analyses_marginal_effects_main.csv")
main_desc <- rio::import("est/stayer_analyses_descriptives_main.csv")
main_info <-
  rio::import("est/stayer_analyses_model_info_main.csv") %>%
  dplyr::mutate(spec = dplyr::case_when(
    is.na(spec) ~ "All",
    spec == "ltr_since_3 == 0" ~ "Moved last 3", 
    spec == "ltr_since_5 == 0" ~ "Moved last 5", 
    spec == "ltr_since_3 == 1" ~ "Since 3", 
    spec == "ltr_since_5 == 1" ~ "Since 5"
  ))

aux_ames <- rio::import("est/stayer_analyses_marginal_effects_aux.csv")
aux_desc <- rio::import("est/stayer_analyses_descriptives_aux.csv")
aux_info <-
  rio::import("est/stayer_analyses_model_info_aux.csv")


## Appendix plots ----
## Load objects
stayer_coef <-
  rio::import("est/stayer_analyses_model_coefficients_aux.csv") %>%
  dplyr::filter(model_id %in% c(5, 20)) %>%
  dplyr::select(-V1) %>%
  dplyr::bind_rows(
    rio::import(
      "est/stayer_analyses_model_coefficients_main.csv"
    ) %>%
      dplyr::filter(model_id %in% c(389)) %>%
      dplyr::select(-V1)
  )


## Harmonize variable names
colnames(stayer_coef) <- colnames(stayer_coef) %>%
  gsub("X.", "", .) %>%
  gsub("\\._", "\\_", .) %>%
  gsub(".cold_rent_sqm_cwu_imp1.1", ".1.cold_rent_sqm_cwu_imp1", .) %>%
  gsub(".cold_rent_sqm_cwu_imp2.1", ".1.cold_rent_sqm_cwu_imp2", .) %>%
  gsub(".cold_rent_sqm_cwu_imp3.1", ".1.cold_rent_sqm_cwu_imp3", .) %>%
  gsub(".cold_rent_sqm_cwu_imp4.1", ".1.cold_rent_sqm_cwu_imp4", .) %>%
  gsub(".cold_rent_sqm_cwu_imp5.1", ".1.cold_rent_sqm_cwu_imp5", .) %>%
  gsub(".cold_rent_sqm_umn_imp1.1", ".1.cold_rent_sqm_umn_imp1", .) %>%
  gsub(".cold_rent_sqm_umn_imp2.1", ".1.cold_rent_sqm_umn_imp2", .) %>%
  gsub(".cold_rent_sqm_umn_imp3.1", ".1.cold_rent_sqm_umn_imp3", .) %>%
  gsub(".cold_rent_sqm_umn_imp4.1", ".1.cold_rent_sqm_umn_imp4", .) %>%
  gsub(".cold_rent_sqm_umn_imp5.1", ".1.cold_rent_sqm_umn_imp5", .) %>%
  gsub("gtyp5urban...500k_imp1.1", "gtyp5urban...500k.1_imp1", .) %>%
  gsub("gtyp5urban...500k_imp2.1", "gtyp5urban...500k.1_imp2", .) %>%
  gsub("gtyp5urban...500k_imp3.1", "gtyp5urban...500k.1_imp3", .) %>%
  gsub("gtyp5urban...500k_imp4.1", "gtyp5urban...500k.1_imp4", .) %>%
  gsub("gtyp5urban...500k_imp5.1", "gtyp5urban...500k.1_imp5", .)

## Recode x_names in stayer_coef (to match column names)
stayer_coef <- stayer_coef %>%
  mutate(x_names = x_names %>%
           as.character() %>%
           ifelse(
             startsWith(., "gtyp5urban > 500k"),
             paste0(substr(., 1, nchar("gtyp5urban > 500k")),
                    ".1",
                    substr(., nchar("gtyp5urban > 500k") + 1, nchar(.))),
             .
           )) %>%
  mutate(x_names = x_names %>%
           as.character() %>%
           ifelse(endsWith(., "gtyp5urban > 500k"),
                  paste0(., ".1"),
                  .)) %>%
  mutate(
    x_names = x_names %>%
      gsub("\\(", "", .) %>%
      gsub("\\)", "", .) %>%
      gsub("<", "\\.", .) %>%
      gsub(">", "\\.", .) %>%
      gsub("=", "\\.", .) %>%
      gsub(" ", "\\.", .) %>%
      gsub("\\:", "\\.", .) %>%
      gsub("&", "\\.", .) %>%
      gsub("-", "\\.", .) %>%
      gsub("\\`", "", .)
  )

## Split stayer_coef by model_id, drop empty columns
stayer_coef <- stayer_coef %>%
  group_by(model_id) %>%
  group_split %>%
  lapply(select_if, all_na)

## Stack by imputations
stayer_est <- stayer_coef %>%
  lapply(.,
         function(mod) {
           varying <- mod %>%
             dplyr::select(contains("_imp")) %>%
             names() %>%
             split(
               .,
               mod %>%
                 dplyr::select(contains("_imp")) %>%
                 names() %>%
                 sapply(function(name)
                   substr(name, 1, nchar(name) - 5)) %>%
                 factor(
                   .,
                   levels = mod %>%
                     dplyr::select(contains("_imp")) %>%
                     names() %>%
                     sapply(function(name)
                       substr(name, 1, nchar(name) - 5)) %>%
                     unique()
                 )
             ) %>%
             lapply(function(names)
               sapply(names, function(name)
                 which(names(mod) == name)))
           mod <- mod %>%
             as.data.frame() %>%
             reshape(
               direction = "long",
               varying = varying,
               v.names = names(varying),
               timevar = "imp",
               times = 1:5
             ) %>%
             dplyr::select(-id)
           
           return(mod)
         })

## Split by model_id and imp
stayer_est <- stayer_est %>%
  lapply(.,
         function(mod) {
           mod %>%
             group_by(imp) %>%
             group_split()
         })

## Simulate
stayer_imp_sim <- stayer_est %>%
  lapply(.,
         function (mod) {
           b_sim <- lapply(mod,
                           function (imp) {
                             x_names <- imp$x_names
                             b <- as.matrix(imp[, 4])
                             vcov <- as.matrix(imp[, 5:ncol(imp)])
                             vcov <- vcov[, x_names]
                             set.seed(20230329)
                             sim <- MASS::mvrnorm(10000L, b, vcov)
                             
                             return(list(x_names = x_names,
                                         sim = sim))
                           }) 
           x_names <- b_sim[[1]]$x_names
           b_sim <- lapply(b_sim, function (obj)
             obj$sim) %>%
             do.call(rbind, .)
           colnames(b_sim) <- x_names
           
           return(b_sim)
         })

## Mediation analysis ----
## Effect of market rents on the mediator (household rents)
p1 <- ggvote_plot_mfx2(
  ames_data = aux_ames,
  desc_data = aux_desc,
  model_ids = 5,
  add_descriptives = TRUE,
  d = "cmr_arm_cwu",
  x1  = "Homeownership",
  x2 = "log_hinc_eq_cat",
  x2_continuous = FALSE,
  subset_x1 = "Renters",
  ytitle = "Effect of Market Rents (€/sqm)\non Household Rents (€/sqm)",
  xtitle = "Log. Equivalized Household Income",
  font_size = 15L
) +
  ggtitle("Indirect effect")

## Effect of the mediator (household rents)
p2 <- ggvote_plot_mfx2(
  ames_data = main_ames,
  desc_data = main_desc,
  model_ids = 389,
  add_descriptives = TRUE,
  d = "cold_rent_sqm_cwu",
  x1  = "Homeownership",
  x2 = "log_hinc_eq_cat",
  x2_continuous = FALSE,
  subset_x1 = "Renters",
  ytitle = "Effect of Household Rents (€/sqm)\non AfD Support",
  xtitle = "Log. Equivalized Household Income",
  font_size = 15L,
  ylim = c(-.02, .04)
) +
  ggtitle(" ")

## Total effect of market rents
p_te <- ggvote_plot_mfx2(
  ames_data = main_ames,
  desc_data = main_desc,
  model_ids = 386,
  add_descriptives = TRUE,
  d = "cmr_arm_cwu",
  x1  = "Homeownership",
  x2 = "log_hinc_eq_cat",
  x2_continuous = FALSE,
  subset_x1 = "Renters",
  ytitle = "Effect of Market Rents (€/sqm)\non AfD Support",
  xtitle = "Log. Equivalized Household Income",
  font_size = 15L,
  ylim = c(-.02, .04)
) +
  ggtitle("Total effect")

### Controlled direct effect of market rents
p_de <- ggvote_plot_mfx2(
  ames_data = main_ames,
  desc_data = main_desc,
  model_ids = 389,
  add_descriptives = TRUE,
  d = "cmr_arm_cwu",
  x1  = "Homeownership",
  x2 = "log_hinc_eq_cat",
  x2_continuous = FALSE,
  subset_x1 = "Renters",
  ytitle = "Effect of Market Rents (€/sqm)\non AfD Support",
  xtitle = "Log. Equivalized Household Income",
  font_size = 15L,
  ylim = c(-.02, .04)
) +
  ggtitle("Direct effect")

## Indirect effect of market rents that unfolds via household rents
main_aie <- main_ames %>%
  dplyr::filter(model_id == 386) %>%
  dplyr::filter(factor == "cmr_arm_cwu")

mod_vals <-
  aux_ames$log_hinc_eq_cat[aux_ames$model_id == 5 &
                             aux_ames$factor == "cmr_arm_cwu"]

for (n in seq_along(mod_vals)) {
  if (mod_vals[n] == "T1") {
    t_on_m <- stayer_imp_sim[[1]][, "cmr_arm_cwu"] 
    m_on_y <- stayer_imp_sim[[3]][, "cold_rent_sqm_cwu"]
  } else {
    t_on_m <- stayer_imp_sim[[1]][, "cmr_arm_cwu"] + 
      stayer_imp_sim[[1]][, paste0("cmr_arm_cwu.log_hinc_eq_cat", mod_vals[n])]
    m_on_y <- stayer_imp_sim[[3]][, "cold_rent_sqm_cwu"] +
      stayer_imp_sim[[3]][, paste0("log_hinc_eq_cat", mod_vals[n], ".cold_rent_sqm_cwu")]
  }
  
  main_aie$AME[n] <- median(t_on_m * m_on_y)
  main_aie$SE[n] <- sd(t_on_m * m_on_y)
  main_aie$lower[n] <- quantile(t_on_m * m_on_y, .025)
  main_aie$upper[n] <- quantile(t_on_m * m_on_y, .975)
}

p3 <- ggvote_plot_mfx2(
  ames_data = main_aie,
  desc_data = main_desc,
  model_ids = 386,
  add_descriptives = TRUE,
  d = "cmr_arm_cwu",
  x1  = "Homeownership",
  x2 = "log_hinc_eq_cat",
  x2_continuous = FALSE,
  subset_x1 = "Renters",
  ytitle = "Effect of Local Market Rents (€/sqm)\non household rents (€/sqm)",
  xtitle = "Log. Equivalized Household Income",
  font_size = 15L,
  ylim = c(-.02, .04)
) +
  ggtitle(" ")

## Plot effects ---- 
ggsave(
  filename = "mech-app-mediation-afd-1.pdf",
  plot = ggpubr::ggarrange(
    ggpubr::ggarrange(p1, p2, p3, nrow = 1),
    ggpubr::ggarrange(p_de, p_te, nrow = 1),
    ncol = 1
  ),
  path = "fig",
  width = 16,
  height = 12,
  dpi = 200,
  limitsize = FALSE
)


## Sensitivity ----
## Functions
pool_imputations <- function(df) {
  # Objects
  qoi <- unique(df$qoi)
  tertile <- unique(df$tertile)
  est <- t(df$est)
  se <- t(df$se)
  
  if ("rho" %in% names(df)) {
    rho <- unique(df$rho)
  } else {
    rho <- NA_real_
  }
  
  ## Point estimates
  est_pooled <-  apply(est, 1, mean)
  
  ## Standard errors
  pool_ses <- function(est, se) {
    m <- ncol(se)
    V_W <- apply(se, 1, function(x)
      mean(x ^ 2))
    V_B <- (1 - m) * apply(est, 1, function (x)
      sum(x - mean(x)))
    V_T <- V_W + V_B + V_B / m
    SE_pooled <- sqrt(V_T)
    return(SE_pooled)
  }
  ses_pooled <- pool_ses(est, se)
  
  return(data.frame(qoi = qoi,
                    tertile = tertile,
                    est = est_pooled,
                    se = ses_pooled,
                    rho = rho))
}

pool_imp <- function(df) {
  ## Pool estimates
  pooled_imputations <- pool_imputations(df)
  
  ## Add upper and lower bounds
  pooled_imputations$lower <-
    pooled_imputations$est + qnorm(.025) * pooled_imputations$se
  pooled_imputations$upper <-
    pooled_imputations$est + qnorm(.975) * pooled_imputations$se
  
  
  ## Return
  return(pooled_imputations)
}

## Estimates
med_res <- rio::import("est/mediation_results.csv") %>%
  dplyr::select(-V1) %>%
  dplyr::group_by(qoi, tertile) %>% 
  dplyr::select(est, se) %>%
  dplyr::group_split(.keep = TRUE) %>%
  lapply(function(df) {
    pool_imp(df)
  }) %>%
  dplyr::bind_rows() %>%
  dplyr::arrange(tertile, qoi) %>%
  dplyr::mutate(log_hinc_eq_cat = paste0("T", tertile))

p_me <- ggmed_plot(
  ames_data = med_res,
  desc_data = aux_desc,
  model_ids = 19,
  add_descriptives = TRUE,
  d = "me",
  x = "log_hinc_eq_cat",
  ytitle = "Effect",
  xtitle = "",
  font_size = 15L,
  ylim = c(-.03, .03)
)

p_de <- ggmed_plot(
  ames_data = med_res,
  desc_data = aux_desc,
  model_ids = 19,
  add_descriptives = TRUE,
  d = "de",
  x = "log_hinc_eq_cat",
  ytitle = "Effect",
  xtitle = "",
  font_size = 15L,
  ylim = c(-.03, .03)
)

p_te <- ggmed_plot(
  ames_data = med_res,
  desc_data = aux_desc,
  model_ids = 19,
  add_descriptives = TRUE,
  d = "de",
  x = "log_hinc_eq_cat",
  ytitle = "Effect",
  xtitle = "",
  font_size = 15L,
  ylim = c(-.03, .03)
)

med_sen <- rio::import("est/mediation_sensitivity.csv") %>%
  dplyr::select(-V1, -dplyr::starts_with("re_")) %>%
  dplyr::rowwise() %>%
  dplyr::mutate(div_lower = abs(est - lower) / qnorm(.975),
                div_upper = abs(est - upper) / qnorm(.975),
                se = mean(c(div_upper, div_lower))) %>%
  dplyr::group_by(qoi, rho, tertile) %>% 
  dplyr::select(est, se) %>%
  dplyr::group_split(.keep = TRUE) %>%
  lapply(function(df) {
    pool_imp(df)
  }) %>%
  dplyr::bind_rows() %>%
  dplyr::arrange(tertile, qoi, rho) %>%
  dplyr::mutate(log_hinc_eq_cat = paste0("T", tertile))

p_sen <- ggplot2::ggplot(
  data = med_sen %>%
    dplyr::filter(qoi %in% c("de", "me")) %>%
    dplyr::mutate(qoi = ifelse(qoi == "de", "Direct Effect", "Indirect Effect")),
  aes(x = rho, y = est)
) +
  ggplot2::facet_grid(. ~ qoi ~ log_hinc_eq_cat) +
  ggplot2::geom_hline(yintercept = 0, lty = 3) +
  ggplot2::geom_vline(xintercept = 0, lty = 3) +
  ggplot2::geom_ribbon(aes(ymin = lower, ymax = upper), linetype = 1, alpha = 0.2) +
  ggplot2::geom_line() +
  ggplot2::ylab("") +
  ggplot2::xlab(expression(rho))
# T1: rho > -0.30; T2: rho > 0.075; T3: rho < 0.40

ggsave(
  filename = "mech-sensitivity-1.pdf",
  plot = p_sen,
  path = "fig",
  width = 8,
  height = 6,
  dpi = 200,
  limitsize = FALSE
)
